#!/usr/bin/python
# -*- coding:utf-8 -*-
# @author  : micah
# @time    : 2023/11/29 22:44
# @function: the script is used to do something.
# @version : 

from bs4 import BeautifulSoup
import requests

url = 'https://movie.douban.com/top250'
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'

}
response = requests.get(url, headers=headers).text

# 提取数据
# print(response)
soup = BeautifulSoup(response, 'lxml')

# 标签选择器
# title = soup.select('title')
# print(title)

# 类选择器
# title = soup.select('.downlinknoli')
# print(title)

# id选择器
# id_data = soup.select('#page')
# print(id_data)

# 层级选择器
# id_data = soup.select('#page #navigationtop')
# print(id_data)
#
# # 属性选择器
# tr_list = soup.select('tr[align="center"]')
# print(tr_list)

# 获取标签文本信息
# tr_list = soup.select('h1')
# print(tr_list[0].text)
# print(tr_list[0].string)
# print(tr_list[0].get_text())
# print(tr_list[0].getText())

# for i in tr_list:
#     print(i.get('href'))

# 伪类选择器
# tr_list = soup.select('tr td:nth-child(1)')
# print(tr_list)

# 获取属性
html_ol = soup.select('ol')
print(html_ol[0].get('class'))



